# Write custom skim function
percent_skim <- skim_with(
  factor = sfl(percents = ~ {
    # Generate percentage table
    percent_table <- round(prop.table(table(.)) * 100, 1)

    # Clean up values
    values <- sprintf("%.1f%%", percent_table)

    # Bind names with values and return
    paste(
      names(percent_table), values,
      sep = ": ", collapse = "; "
    )
  }),
  character = sfl(percents = ~ {
    # Generate percentage table
    percent_table <- round(prop.table(table(.)) * 100, 1)

    # Clean up values
    values <- sprintf("%.1f%%", percent_table)

    # Bind names with values and return
    paste(
      names(percent_table), values,
      sep = ": ", collapse = "; "
    )
  }),
)

# Load data
panel_data <- readRDS("Data/panel/panel_data.rds") %>%
  select(id, age, gender, educ, race)

exp1_data <- readRDS("Data/exp_1/data.rds") %>%
  select(age, gender, educ, race)

exp2_data <- readRDS("Data/exp_2/data.rds") %>%
  select(age, gender, educ, race)

exp3_data <- readRDS("Data/exp_3/data.rds") %>%
  select(age, gender, educ, race)

# Create study variable for each dataset
panel_data$study <- "Panel"
exp1_data$study <- "Experiment 1"
exp2_data$study <- "Experiment 2"
exp3_data$study <- "Experiment 3"

# Subset panel data to unique participants
panel_data <- panel_data %>%
  subset(!duplicated(id))

# Drop id variable from panel data
panel_data <- panel_data %>%
  select(-id)

# Bind and select data from different studies
bound_data <- bind_rows(
  panel_data, exp1_data, exp2_data, exp3_data
)

# Rename variables
bound_data <- bound_data %>%
  rename(
    Age = age,
    Education = educ,
    Gender = gender,
    Race = race
  )

# Generate sample demographics table
demo_table <- bound_data %>%
  select(-starts_with("start_")) %>%
  group_by(study) %>% # Group by study
  percent_skim() %>% # Generate percentage table
  as.data.frame() %>%
  select(skim_variable, study, factor.percents) %>%
  mutate( # Delete parentheses from study names
    study = study %>% str_remove(" \\(.*\\)")
  ) %>%
  separate_longer_delim( # Reshape output
    factor.percents,
    delim = "; "
  ) %>%
  separate_wider_delim(
    factor.percents,
    delim = ": ",
    names = c("Level", "Percent")
  ) %>%
  pivot_wider(
    names_from = "study",
    values_from = Percent
  ) %>%
  mutate( # Delete duplicate demographic types
    skim_variable = ifelse(duplicated(skim_variable), "", skim_variable)
  ) %>%
  rename( # Rename skim_variable
    Attribute = skim_variable
  )

# Print table
demo_table %>%
  datasummary_df(
    title = "Sample Demographics by Study",
    output = "Tables/sample_demos.txt"
  )
